import time

import requests
from lxml import etree
from requests.exceptions import ProxyError


def get_proxies():
	res = requests.get(
		"https://dps.kdlapi.com/api/getdps/?secret_id=?&signature=?&num=1&pt=1&format=text&sep=1&f_auth=1&generateType=4")
	proxies = {
		"http": f"http://{res.text}",
		"https": f"http://{res.text}"
	}
	print(f"获取代理{proxies}。。。。。")
	# res = requests.get("http://httpbin.org/get", proxies=proxies)
	# print(res.json()['origin'])
	return proxies


headers = {
	"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
}
start = 0
proxies = get_proxies()
while True:
	try:
		res = requests.get(f"https://movie.douban.com/top250?start={start}&filter=", headers=headers, proxies=proxies)
		tree = etree.HTML(res.text)
		title = tree.xpath('//title/text()')[0]
		if "登录跳转页" in title:
			print(f"有异常请求从你的 IP 发出，请 登录 使用豆瓣")
			raise ProxyError()
		elif "004" in res.text:
			print(f"返回内容004")
			raise ProxyError()
		else:
			items = tree.xpath('//ol[@class="grid_view"]/li')
			print(f"发现目标{len(items)}个")
			start += 25
			if start == 250:
				print(f"爬取结束...........")
				break

		time.sleep(20)
	except ProxyError as pe:
		proxies = get_proxies()